#   LibAut
#       onset regressions, multiple
#       Table 1 column (model 1), Table C3
#   Juraj Medzihorsky
#   2021-08-12


library(arm)
library(mgcv)
library(logistf)
library(parallel)
options(mc.cores=2)
options(stringsAsFactors=F)
source('helpers.R')
tstat <- function(x) coef(x)/se.coef(x)

#   sessionInfo()
##  R version 3.6.3 (2020-02-29)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 16.04.7 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.9.so
##
##  locale:
##   [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_GB.UTF-8       LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  stats     graphics  grDevices utils     datasets  methods   base
##
##  other attached packages:
##  [1] logistf_1.24   mgcv_1.8-28    nlme_3.1-152   arm_1.9-3      lme4_1.1-18-1  Matrix_1.3-4   MASS_7.3-54
##  [8] nvimcom_0.9-28 colorout_1.2-0
##
##  loaded via a namespace (and not attached):
##   [1] Rcpp_1.0.6           compiler_3.6.3       pillar_1.6.1         nloptr_1.2.1         tools_3.6.3
##   [6] rpart_4.1-13         lifecycle_1.0.0      tibble_3.1.2         lattice_0.20-44      pkgconfig_2.0.3
##  [11] rlang_0.4.11         DBI_1.0.0            coda_0.19-4          dplyr_1.0.6          generics_0.1.0
##  [16] vctrs_0.3.8          nnet_7.3-16          grid_3.6.3           tidyselect_1.1.1     mice_3.3.0
##  [21] glue_1.4.2           R6_2.5.0             fansi_0.5.0          survival_2.44-1.1    mitml_0.3-6
##  [26] minqa_1.2.4          tidyr_1.1.3          purrr_0.3.4          magrittr_2.0.1       backports_1.2.1
##  [31] operator.tools_1.6.3 formula.tools_1.7.1  ellipsis_0.3.2       splines_3.6.3        assertthat_0.2.1
##  [36] abind_1.4-5          utf8_1.2.1           broom_0.7.3          pan_1.6              crayon_1.4.1
##  [41] jomo_2.6-5

#------------------------------------------------------------------------------
#   data load

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)
figs_dir <- gsub('code$', 'figures', code_dir)
tabs_dir <- gsub('code$', 'tables', code_dir)

setwd(data_dir)
dat <- readRDS('onset_20210810.rds')
eps <- read.csv('eps.csv')
setwd(code_dir)
dat <- subset(dat, onset_eligible%in%1)

#------------------------------------------------------------------------------
#   data prep

#   get EP shares
tab_dem <- aggregate(eps$dem_ep, by=list(eps$year), FUN=mean, na.rm=T)
tab_aut <- aggregate(eps$aut_ep, by=list(eps$year), FUN=mean, na.rm=T)
names(tab_dem) <- c('year', 'share_dem_ep')
names(tab_aut) <- c('year', 'share_aut_ep')

tab <- merge(tab_dem, tab_aut, by='year')
tab_lag <- tab
tab_lag$year <- tab$year + 1
names(tab_lag)[2:3] <- paste0('lag1_', names(tab)[2:3])

tab_tab <- merge(tab, tab_lag)

dat <- merge(dat, tab_tab, by='year')

weird <- which(abs(dat$lag1_e_migdpgro)>1)
dat$lag1_e_migdpgro[weird] <- NA

dat$logpop <- log(dat$e_mipopula) 

dat$reg_fac <- as.factor(dat$e_regionpol_6C)

#------------------------------------------------------------------------------
#   models

#   formulas
fy0 <- libaut_start ~ 1
#
lin_1 <- ~ . + 
    reg_fac +
    lag1_e_migdppcln + 
    lag1_e_migdpgro + 
    lag1_logpop +
    lag1_v2pepwrsoc + 
    lag1_v2xeg_eqdr + 
    lag1_v2xnp_pres + 
    lag1_v2csprtcpt +
    lag1_v2x_polyarchy +
    lag1_excl_region_edi + 
    lag1_e_miinteco + 
    lag1_e_miinterc +
    lag1_share_dem_ep +
    lag1_share_aut_ep  
#
lin_2 <- ~ . + 
    I(1e-1*(year-1990)) + I((1e-1*(year-1990))^2) + I((1e-1*(year-1990))^3) 
#
fl1 <- update(fy0, lin_1)
fl2 <- update(fl1, lin_2)


#   gaussian-linear GLM under MLE
o0 <- lm(fy0, data=dat)
o1 <- lm(fl1, data=dat)
o2 <- lm(fl2, data=dat)

#   bernoulli-logistic GLM under MLE
l0 <- glm(fy0, data=dat, family=binomial('logit'))
l1 <- glm(fl1, data=dat, family=binomial('logit'))
l2 <- glm(fl2, data=dat, family=binomial('logit'))

#   bernoulli-logistic GLM under FPL
mcont <- logistf.control(maxit=1e4, maxstep=1e4)
f0 <- logistf(fy0, data=dat, control=mcont)
f1 <- logistf(fl1, data=dat, control=mcont)
f2 <- logistf(fl2, data=dat, control=mcont)

#------------------------------------------------------------------------------
#   tabulate

bb <- 
    data.frame(o1=c(round(coef(o1),2), rep(NA,3)),
               o2=c(round(coef(o2),2)),
               l1=c(round(coef(l1),2), rep(NA,3)),
               l2=c(round(coef(l2),2)),
               f1=c(round(coef(f1),2), rep(NA,3)),
               f2=c(round(coef(f2),2)))

bt <- paste(apply(bb, 1, paste, collapse=' & '), '\\\\')

ci <-
    data.frame(
        o1=c(paste0('(', round(confint(o1),2)[,1], ', ', round(confint(o1),2)[,2], ')'), rep('',3)),
        o2=  paste0('(', round(confint(o2),2)[,1], ', ', round(confint(o2),2)[,2], ')'),
        l1=c(paste0('(', round(confint(l1),2)[,1], ', ', round(confint(l1),2)[,2], ')'), rep('',3)),
        l2=  paste0('(', round(confint(l2),2)[,1], ', ', round(confint(l2),2)[,2], ')'),
        f1=c(paste0('(', round(confint(f1),2)[,1], ', ', round(confint(f1),2)[,2], ')'), rep('',3)),
        f2=  paste0('(', round(confint(f2),2)[,1], ', ', round(confint(f2),2)[,2], ')'))

ct <- paste(apply(ci, 1, paste, collapse=' & '), '\\\\')

bt <- gsub('-', '$-$', bt)
ct <- gsub('-', '$-$', ct)
bt <- gsub('NA', '', bt)
ct <- gsub('NA', '', ct)

tab <- character(length=4*length(bt))
for (i in 1:length(bt)) {
    ii <- (i-1)*3 + i
    tab[ii] <- rownames(bb)[i]
    tab[ii+1] <- paste0(' & ', bt[i])
    tab[ii+2] <- paste0(' & ', ct[i])
}

setwd(tabs_dir)
writeLines(tab, con='Table_C3.tex')


#   this goes to the bottom of Table C.3
round(c(AIC(o1), AIC(o2), AIC(l1), AIC(l2), f1$aic, f2$aic), 2)
#   -2153.60 -2183.86  1721.33  1692.88

#   this goes into the legend of Table C.3
c(nrow(l1$model), nrow(l2$model))
#   5449 5449
table(l1$model$libaut_start)
#   5214  235

#   LPO-CV AUC-ROC are in a different script

#   SCRIPT END